import requests
import re
import json
import csv

# 伪装 字典
headers = {
    # 浏览器基本信息
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36'
}
with open('../data.csv', mode='a', encoding='utf-8', newline='') as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow(['area', 'curConfirm', 'curConfirmRelative', 'confirmed', 'crued', 'died'])
url = 'https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_aladin_banner#tab4'
# 1. 发送请求
# x=1,y=2
response = requests.get(url=url, headers=headers)
# 2. 获取数据
# <Response [200]>: 访问成功
html_data = response.text
# print(html_data)
# 3. 解析数据 正则
# a. 告诉他 我们要怎么查找 "component":\[(.*?)\]
# b. 我们在哪里查找
# 正则会把所有能匹配到的数据都会给到我们
# (.*?)
# .*匹配任意字符 0次或者多次
# ? 非贪婪匹配符
json_str = re.findall('"component":\[(.*)\],', html_data)[0]
# 类型 转化一下 字典
json_dict = json.loads(json_str)
caseList = json_dict['caseList']

for case in caseList:
    area = case['area']             # 省份
    curConfirm = case['curConfirm'] # 确诊人数
    curConfirmRelative = case['curConfirmRelative'] # 当前确诊
    confirmed = case['confirmed']   # 累计确诊
    crued = case['crued']           # 治愈人数
    died = case['died']             # 死亡人数
    print(area, curConfirm, curConfirmRelative, confirmed, crued, died)
    with open('../data.csv', mode='a', encoding='utf-8', newline='') as f:
        csv_writer = csv.writer(f)
        csv_writer.writerow([area, curConfirm, curConfirmRelative, confirmed, crued, died])
